Smart Waste Management: Fill Level Prediction + Route Optimization¶

This notebook bundles:

  1. Load dataset (bins + telemetry)
  2. Train Random Forest model to predict fill level
  3. Detect FULL/OVERFLOW bins
  4. Optimize garbage truck collection routes (20 trucks)
  5. Generate route map
In [1]:
import pandas as pd
import numpy as np
from datetime import datetime

Load Datasets¶

In [2]:
bins_df = pd.read_csv("bins_master.csv")

import numpy as np
# Generate / update random fill levels dynamically
bins_df['fill_level_pct'] = np.random.randint(10, 100, size=len(bins_df))
telemetry_df = pd.read_csv("telemetry_24h.csv")
bins_df.head(), telemetry_df.head()
Out[2]:
(    bin_id   latitude  longitude  fill_level_pct
 0  HYD0001  17.349816  78.437772              16
 1  HYD0002  17.580286  78.465702              13
 2  HYD0003  17.492798  78.599092              64
 3  HYD0004  17.439463  78.419002              31
 4  HYD0005  17.262407  78.604377              19,
              timestamp   bin_id   latitude  longitude  ultrasonic_cm  \
 0  2025-01-12 00:00:00  HYD0001  17.349816  78.437772          92.35   
 1  2025-01-12 00:00:00  HYD0002  17.580286  78.465702          99.20   
 2  2025-01-12 00:00:00  HYD0003  17.492798  78.599092         101.78   
 3  2025-01-12 00:00:00  HYD0004  17.439463  78.419002          96.48   
 4  2025-01-12 00:00:00  HYD0005  17.262407  78.604377         101.25   
 
    fill_level_pct    status  
 0            5.86  NOT_FULL  
 1            2.33  NOT_FULL  
 2            0.07  NOT_FULL  
 3            3.68  NOT_FULL  
 4            1.81  NOT_FULL  )
In [3]:
bins_df = bins_df[bins_df['bin_id'] != 'bin_id'].reset_index(drop=True)
bins_df.head()
Out[3]:
bin_id latitude longitude fill_level_pct
0 HYD0001 17.349816 78.437772 16
1 HYD0002 17.580286 78.465702 13
2 HYD0003 17.492798 78.599092 64
3 HYD0004 17.439463 78.419002 31
4 HYD0005 17.262407 78.604377 19
In [4]:
#bins_df = pd.read_csv("bins_master.csv")

# Remove any rows where the bin_id column literally contains the word "bin_id"
# bins_df = bins_df[bins_df['bin_id'] != 'bin_id'].reset_index(drop=True)

# Convert lat/long to float (important)
bins_df['latitude'] = bins_df['latitude'].astype(float)
bins_df['longitude'] = bins_df['longitude'].astype(float)

print(bins_df.head())
    bin_id   latitude  longitude  fill_level_pct
0  HYD0001  17.349816  78.437772              16
1  HYD0002  17.580286  78.465702              13
2  HYD0003  17.492798  78.599092              64
3  HYD0004  17.439463  78.419002              31
4  HYD0005  17.262407  78.604377              19

Train RandomForest Model to Predict Fill Level¶

In [5]:
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_absolute_error, r2_score

df = telemetry_df.copy()
df["timestamp"] = pd.to_datetime(df["timestamp"])
df["hour"] = df["timestamp"].dt.hour
df["dayofweek"] = df["timestamp"].dt.dayofweek

feature_cols = ["ultrasonic_cm", "latitude", "longitude", "hour", "dayofweek"]
X = df[feature_cols]
y = df["fill_level_pct"]

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
model = RandomForestRegressor(n_estimators=300, random_state=42, n_jobs=-1)
model.fit(X_train, y_train)

preds = model.predict(X_test)
print("MAE:", mean_absolute_error(y_test, preds))
print("R^2:", r2_score(y_test, preds))
MAE: 1.262692105555555
R^2: 0.9934184184629896

Route Optimization (CVRP with OR-Tools)¶

In [6]:
# NOTE: Requires: pip install ortools
from math import radians, sin, cos, sqrt, asin
from ortools.constraint_solver import pywrapcp, routing_enums_pb2

def haversine(lat1, lon1, lat2, lon2):
    lon1, lat1, lon2, lat2 = map(np.radians, [lon1, lat1, lon2, lat2])
    dlon = lon2 - lon1
    dlat = lat2 - lat1
    a = np.sin(dlat/2)**2 + np.cos(lat1)*np.cos(lat2)*np.sin(dlon/2)**2
    return 2 * 6371 * np.arcsin(np.sqrt(a))

# Select snapshot
timestamp = "2025-01-12 10:00:00"
snap = telemetry_df[telemetry_df["timestamp"] == timestamp]
snap = snap[snap["status"].isin(["FULL", "OVERFLOW"])]
use_df = snap[["bin_id","latitude","longitude","fill_level_pct"]].drop_duplicates()

# Depot = mean location
depot_lat = use_df["latitude"].mean()
depot_lon = use_df["longitude"].mean()

nodes = pd.concat([
    pd.DataFrame({"bin_id":["DEPOT"],"latitude":[depot_lat],"longitude":[depot_lon],"fill_level_pct":[0]}),
    use_df
], ignore_index=True)

# Demands proportional to fill above 80%
nodes["demand"] = (nodes["fill_level_pct"] - 80).clip(lower=0).astype(int)

# Distance matrix
coords = nodes[["latitude","longitude"]].to_numpy()
N = len(coords)
dist_matrix = np.zeros((N,N))
for i in range(N):
    for j in range(N):
        dist_matrix[i,j] = haversine(coords[i][0],coords[i][1],coords[j][0],coords[j][1])

num_vehicles = 20
manager = pywrapcp.RoutingIndexManager(N, num_vehicles, 0)
routing = pywrapcp.RoutingModel(manager)

def distance_callback(from_index, to_index):
    f = manager.IndexToNode(from_index)
    t = manager.IndexToNode(to_index)
    return int(dist_matrix[f][t] * 1000)

transit_callback_index = routing.RegisterTransitCallback(distance_callback)
routing.SetArcCostEvaluatorOfAllVehicles(transit_callback_index)

demands = nodes["demand"].tolist()
capacity = 1000

def demand_callback(from_index):
    return demands[manager.IndexToNode(from_index)]

demand_callback_index = routing.RegisterUnaryTransitCallback(demand_callback)
routing.AddDimensionWithVehicleCapacity(demand_callback_index, 0, [capacity]*num_vehicles, True, "Capacity")

search_params = pywrapcp.DefaultRoutingSearchParameters()
search_params.first_solution_strategy = routing_enums_pb2.FirstSolutionStrategy.PATH_CHEAPEST_ARC
search_params.local_search_metaheuristic = routing_enums_pb2.LocalSearchMetaheuristic.GUIDED_LOCAL_SEARCH
search_params.time_limit.FromSeconds(10)

solution = routing.SolveWithParameters(search_params)

routes = []
if solution:
    for v in range(num_vehicles):
        index = routing.Start(v)
        route = []
        while not routing.IsEnd(index):
            route.append(manager.IndexToNode(index))
            index = solution.Value(routing.NextVar(index))
        route.append(0)
        routes.append(route)

routes[:3]  # Show first 3 routes
Out[6]:
[[0, 0], [0, 0], [0, 0]]

Generate Combined Route Map (Folium)¶

In [7]:
# Create depot at center of all bins
DEPOT_LAT = bins_df['latitude'].mean()
DEPOT_LON = bins_df['longitude'].mean()

depot = pd.DataFrame([{
    'bin_id': 'DEPOT',
    'latitude': DEPOT_LAT,
    'longitude': DEPOT_LON,
    'fill_level_pct': 0
}])

# If your fill level column exists separately, attach it:
bins_df['fill_level_pct'] = bins_df.get('fill_level_pct', 50)  # default 50% if missing

nodes = pd.concat([depot, bins_df[['bin_id','latitude','longitude','fill_level_pct']]]).reset_index(drop=True)

# Safety clean
nodes = nodes.dropna(subset=['latitude','longitude']).reset_index(drop=True)
In [8]:
# create DEPOT + nodes table
DEPOT_LAT = bins_df['latitude'].mean()
DEPOT_LON = bins_df['longitude'].mean()

depot = pd.DataFrame([{
    'bin_id': 'DEPOT',
    'latitude': DEPOT_LAT,
    'longitude': DEPOT_LON,
    'fill_level_pct': 0
}])

nodes = pd.concat([depot, bins_df[['bin_id','latitude','longitude','fill_level_pct']]], ignore_index=True)
In [9]:
import folium

# Center map on city
m = folium.Map(location=[nodes["latitude"].mean(), nodes["longitude"].mean()], zoom_start=11)

# Draw each vehicle route
colors = ["red","blue","green","purple","orange","darkred","lightred","beige",
          "darkblue","darkgreen","cadetblue","darkpurple","pink","lightblue",
          "lightgreen","gray","black","lightgray","brown","cyan"]

for v, route in enumerate(routes):
    coords = [(nodes.iloc[n]["latitude"], nodes.iloc[n]["longitude"]) for n in route]
    folium.PolyLine(coords, color=colors[v % len(colors)], weight=3, opacity=0.8,
                    tooltip=f"Vehicle {v}").add_to(m)

# Add bin markers (color-coded by fill level)
for i, row in nodes.iterrows():
    lat = row["latitude"]
    lon = row["longitude"]
    fill = row["fill_level_pct"]
    bid = row["bin_id"]

    if fill >= 80:
        color = "red"
    elif fill >= 50:
        color = "orange"
        # (You can also use "yellow" but orange is more visible on map tiles)
    else:
        color = "green"

    folium.CircleMarker(
        (lat, lon),
        radius=5,
        color=color,
        fill=True,
        fill_opacity=0.9,
        popup=f"{bid} | Fill Level: {fill}%"
    ).add_to(m)

m.save("combined_routes_map_colored.html")
m
Out[9]:
Make this Notebook Trusted to load map: File -> Trust Notebook
In [ ]: